import codecs
import random


random.seed(222)


input_path = '/home/rickwwang/project_own/story_generation/data/test.wp_target_500.srl2.format.prompt'
result_path = '/home/rickwwang/project_research/fairseq/data-bin/prompt.txt.test'

all_prompts = codecs.open(input_path, 'r', encoding='utf8').readlines()

mean_length = 28

all_prompts = map(lambda x: x.strip(), all_prompts)
all_prompts = filter(lambda x: mean_length - 10 < len(x.split(' ')) < mean_length + 10, all_prompts)
random.shuffle(all_prompts)
all_prompts = all_prompts[:1000]

with codecs.open(result_path, 'w', encoding='utf8') as fout:
    fout.write('\n'.join(all_prompts))
    fout.write('\n')